Introduction to GGplot

library(ggplot2) 
# Load the Iris data
data(iris)
plot(iris)

plot(iris$Sepal.Length,iris$Sepal.Width)

hist(iris$Sepal.Length)

boxplot(iris)

## basic plot
qplot(iris$Sepal.Length, iris$Sepal.Width)

# Can also use a dataframe
qplot(Sepal.Length, Sepal.Width, data = iris)

#Plot types can be specified with the `geom` option
qplot(Sepal.Length, Sepal.Width, data = iris, geom = "point")

qplot(Sepal.Length, Sepal.Width, data = iris, geom = "line")

#Compare with
plot(iris$Sepal.Length,iris$Sepal.Width,type="l")

qplot(Sepal.Length, Sepal.Width, data = iris, geom = c("line","point"))

We can combining plot types

qplot(x = Species, y = Sepal.Length, data = iris, geom = c("boxplot","point"))

qplot(x = Species, y = Sepal.Length, data = iris, geom = c("boxplot","point","jitter"))

In ggplot2, additional variables can be mapped to plot aesthetics including color, fill, shape, size, alpha, linetype.

qplot(x = Species, y = Sepal.Length, data = iris, geom = c("boxplot", "jitter"), color = Sepal.Width)

ggplot2 - mapping aesthetics variables

qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point", "smooth"), 
      color = Species, size = Petal.Width)

ggplot2 - facetting

Sometimes we want to look at the conditional distribution of a variable and visualize some characteristic of a dataset conditioning on the levels of some other variable. For this we use the facets argument.

Facet by columns.

qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point","smooth"), color = Species, size = Petal.Width, facets = ~Species)

Facet by rows

qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point","smooth"), color = Species, size = Petal.Width, facets = Species ~ .)

On to ggplot2

Using our long format dataframe, we will further explore the iris dataset.

iris_long <- readRDS("iris_long.rds")

## "Very" simple plot
ggplot(data = iris_long, aes(x = Width, y= Length))

## let's add some data to it.
ggplot(data = iris_long, aes(x = Width, y= Length)) + 
  geom_point()

## let's add some data to it.
ggplot(data = iris_long, aes(x = Width, y= Length)) +
  geom_point() + 
  facet_wrap(Species ~ flower_part, scales = "free")

## switch the +

# Add a regression line
ggplot(data = iris_long, aes(x = Width, y = Length)) + 
  geom_point() + 
  facet_wrap(Species ~ flower_part, scales = "free") +
  geom_smooth(method = "lm")

iris_cast <- iris_long

ggplot2 and the grammar of graphics

Let’s add the options ‘shape’ and ‘color’ to the aes call.

my_plot <- ggplot(data = iris_cast, aes(x = Width, y = Length, shape = flower_part, color = flower_part)) +
  geom_point() + 
  facet_grid(~Species) + 
  geom_smooth(method = "lm")
my_plot

# Notice that ggplot created an object that we can add to. 
# For example if we wanted to add black/white theme and increase the font size
my_plot + theme_bw(base_size = 24)

Using facet_wrap instead of facet_grid.

ggplot(data = iris_cast, aes(x = Width, y = Length, shape = Species, color = Species)) +
  geom_point() + 
  facet_wrap(~flower_part) + 
  geom_smooth(method = "lm")

## Saving your plot
ggsave("myplot.png", width = 5, height = 5)

Exploring some themes:

library(ggthemes) 
my_plot + theme_excel(base_size = 24)

my_plot + theme_wsj(base_size = 18)

Lets try a larger dataset.

#### First some basic plots ####
ggplot(diamonds, aes(carat, price)) +
  geom_point()

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point()

## Now we'll change the look of the plot ##

ggplot(diamonds, aes(carat, price)) +
  geom_point(color = "blue")

## What can you see from this plot


## Lets make the points more visible.
ggplot(diamonds, aes(carat, price)) +
  geom_point(alpha = 1/20)

ggplot(diamonds, aes(carat, price)) +
  geom_point(shape = 1)

ggplot(diamonds, aes(carat, price)) +
  geom_point(size = 0.1)

## Now let's add a smoother, and look at the relationship by cut.

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point() +
  geom_smooth()  ##by default geom_smooth uses a lowess smoother.

ggplot(diamonds, aes(log10(carat), log10(price), color = cut)) +
  geom_point(alpha = 1/10) +
  geom_smooth()

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point(aes(color = cut),alpha = 1/10) +
  geom_smooth()

### Now let's see the linear model.
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point() +
  geom_smooth(method = "lm")

mod <- lm(log10(price) ~ log10(carat), data = diamonds)

diamonds$resid <- resid(mod)

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point(aes(colour = resid)) +
  geom_smooth(method = "lm")

######## Boxplots #######
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_boxplot(aes(cut))

### A 'violin' plot
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_violin(aes(group = plyr::round_any(log10(carat), 0.1)), scale = "width")

### Plain ole histogram
ggplot(diamonds, aes(depth)) +
  geom_histogram(binwidth = 0.2) +
  xlim(56, 67)

### Layered histogram by cut:
ggplot(diamonds, aes(depth, fill = cut)) +
  geom_histogram(binwidth = 0.2) +
  xlim(56, 67)

### Histrograms separated by cut
ggplot(diamonds, aes(depth)) +
  geom_histogram(binwidth = 0.2) +
  facet_wrap(~cut) +
  xlim(56, 67)

## Same but with different scales.
ggplot(diamonds, aes(depth)) +
  geom_histogram() +
  facet_wrap(~cut,scales = 'free') +
  xlim(56, 67)

### Similar to the layered but with lines
ggplot(diamonds, aes(depth, color = cut)) +
  geom_freqpoly(binwidth = 0.2) +
  xlim(56, 67)

ggplot(diamonds, aes(depth, color = cut)) +
  geom_freqpoly(aes(y = ..density..), binwidth = 0.2) +
  xlim(56, 67)

### Some plot of the price.
ggplot(diamonds, aes(cut, log10(price))) +
  geom_violin()

ggplot(diamonds, aes(log10(price))) +
  geom_density(aes(color = cut))

# Some heat maps to give some information on the joint distribution
## Standard
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point(alpha = 1/10)

## Colored heat map
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d()

## Contoured heat map
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_density2d()

# Some comparisons of the joint distribution of carat and price by color.
## Standard
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point(aes(color = clarity))

## Using a facet
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_point() +
  facet_wrap(~ color)

## Using a facet and a heat map.
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d() +
  facet_wrap(~ color)

## Adding a reference line to the above can be useful.
coef(lm(log10(price) ~ log10(carat), data = diamonds))
##  (Intercept) log10(carat) 
##     3.669207     1.675817
mod <- lm(log10(price) ~ log10(carat), data = diamonds)
mod_coef <- coef(mod)

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d() +
  geom_abline(intercept = mod_coef[1], slope = mod_coef[2], colour = "white") +
  facet_wrap(~color)